## Note: please run L_1_US_structural_topic_models.R prior to this file and save US_STM_result_X.Rdata.

#### setting environment ####
require(quanteda)
require(stm)
require(stringi)
require(runjags)
require(coda)
require(psych)
require(label.switching)

newspaper.names <- c("LAT", "NYT", "SFC", "WP", "WSJ", "WT")

#### data preparation ####
# document-feature matrix of newspaper editorials published from December 1, 2019 to January 31, 2020
load("US_dfm.Rdata")

#### estimate the model with varying number of topics from 21 to 35 ####
for (i in 21:35) {
  # estimation results of the i-topic model
  load(paste0("US_STM_result_", i, ".Rdata"))
  ## estimating the Wordfish model for each topic
  topic.specific.positions <- matrix(NA, 6, i)
  rownames(topic.specific.positions) <- newspaper.names
  wordfish.result <- list()
  for (j in 1:i) {
    # document-feature matrix for topic j (taking a weighted sum of the frequency of term)
    weighted.dfm <- us.dfm * tcrossprod(STM.result$theta[, j], rep(1, ncol(us.dfm)))
    # compress the document-feature matrix at the newspaper level
    dfm.matrix <- matrix(NA, 6, ncol(us.dfm))
    rownames(dfm.matrix) <- newspaper.names
    colnames(dfm.matrix) <- colnames(us.dfm)
    for (k in 1:6) {
      dfm.matrix[k, ] <- round(colSums(weighted.dfm[us.dfm@docvars$newspaper == newspaper.names[k], ]))
    }
    # remove words not used by two or more newspapers
    dfm.matrix <- dfm.matrix[, -1 * which(colSums(dfm.matrix > 0) < 2)]
    # remove newspapers for which the weighted frequency of words is less than 100
    dfm.matrix <- dfm.matrix[rowSums(dfm.matrix) > 99, ]
    # convert a matrix to a dfm object
    pseudo.text <- rep("", nrow(dfm.matrix))
    for (k in 1:nrow(dfm.matrix)) {
      for (l in 1:ncol(dfm.matrix)) {
        pseudo.text[k] <- paste(pseudo.text[k], 
                                paste(rep(colnames(dfm.matrix)[l], dfm.matrix[k, l]), collapse = " "))
      }
    }
    Wordfish.dfm <- dfm(pseudo.text, what = "fastestword")
    # estimate the Wordfish model
    set.seed(12345)
    wordfish.result[[j]] <- textmodel_wordfish(Wordfish.dfm)
    # record topic-specific positions of the newspapers
    newspaper.ID <- match(rownames(dfm.matrix), newspaper.names)
    topic.specific.positions[newspaper.ID, j] <- wordfish.result[[j]]$theta
  }
  ## estimate the factor analysis model
  # function to create initial values
  initial.fun <- function(seed1, seed2) {
    set.seed(seed1)
    b <- rnorm(i)
    x <- rnorm(6, 0, 0.01)
    p <- runif(i, 0.5, 1)
    list(beta = b, x = x, inv.phi = p,
         .RNG.name = "base::Wichmann-Hill", .RNG.seed = seed2)
  }
  # create initial values
  initial.values <- list()
  for (j in 1:3) {
    initial.values[[j]] <- initial.fun(100 * j, 1000 * j)
  }
  # estimate a factor analysis model using JAGS
  FA.result.raw <- run.jags(model = "factor_analysis_JAGS_preliminary.R", 
                            monitor = c("x", "beta", "phi", "deviance"), 
                            data = list(y = topic.specific.positions, n = 6, m = i), 
                            inits = initial.values, 
                            n.chains = 3, burnin = 5000, sample = 5000, modules = c("glm", "dic"), 
                            summarise = FALSE, thin = 10, method = "parallel")
  ## postprocess for sign reversal using the pivotal reordering algorithm
  FA.result.list <- list()
  for (j in 1:3) {
    mcmc.pars <- array(NA, c(5000, 2, 6 + i * 2))
    mcmc.pars[, 1, 1:(6 + i)] <- FA.result.raw$mcmc[[j]][, 1:(6 + i)]
    mcmc.pars[, 2, 1:(6 + i)] <- -1 * FA.result.raw$mcmc[[j]][, 1:(6 + i)]
    mcmc.pars[, 1, (7 + i):(6 + i * 2)] <- mcmc.pars[, 2, (7 + i):(6 + i * 2)] <- 
      FA.result.raw$mcmc[[j]][, (7 + i):(6 + i * 2)]
    pivot <- which.min(unlist(FA.result.raw$mcmc[[j]][, "deviance"]))
    relabeling <- pra(mcmc.pars, mcmc.pars[pivot, , ])$permutations
    parmuted.mcmc.pars <- permute.mcmc(mcmc.pars, relabeling)$output[, 1, ]
    FA.result.list[[j]] <- cbind(-1 * sign(mean(parmuted.mcmc.pars[, 1])) * parmuted.mcmc.pars[, 1:(6 + i)], 
                                 parmuted.mcmc.pars[, (7 + i):(6 + i * 2)])
  }
  FA.result <- mcmc.list(mcmc(FA.result.list[[1]]), mcmc(FA.result.list[[2]]), mcmc(FA.result.list[[3]]))
  # convergence check
  print(which(gelman.diag(FA.result, multivariate = FALSE)[[1]][, 1] > 1.1))
  # posterior draws of ideal points
  ideal.points.draws <- t(rbind(FA.result[[1]][, 1:6], 
                                FA.result[[2]][, 1:6], 
                                FA.result[[3]][, 1:6])) * 10
  rownames(ideal.points.draws) <- newspaper.names
  ## summary statistics
  # point estimates (posterior means)
  ideal.points.mean <- rowMeans(ideal.points.draws)
  # 95% credible intervals (highest posterior density intervals)
  ideal.points.CI <- HPDinterval(as.mcmc(t(ideal.points.draws)))
  # ascending order of the point estimates
  ideal.points.order <- order(ideal.points.mean, decreasing = TRUE)
  Wordshoal.list <- list(ideal.points.mean = ideal.points.mean, 
                         ideal.points.CI = ideal.points.CI, 
                         ideal.points.order = ideal.points.order, 
                         wordfish.result = wordfish.result, 
                         topic.specific.positions = topic.specific.positions, 
                         FA.result = FA.result)
  save(Wordshoal.list, 
      file = paste0("US_Wordshoal_result_", i, ".Rdata"))
  print(paste0("Wordshoal using ", i, "-topic model is finished at ", date(), "."))
}

#### illustrate the results (Figure A.10) ####
cairo_pdf("Figure_A10.pdf",
          width = 6, height = 4.5, pointsize = 8, family = "Helvetica")
layout(matrix(1:15, 3, 5, byrow = TRUE))
par(mar = c(2, 1, 2, 1))
for (i in 21:35) {
  load(paste0("US_Wordshoal_result_", i, ".Rdata"))
  names(Wordshoal.list$ideal.points.mean) <- newspaper.names
  dotchart(Wordshoal.list$ideal.points.mean[Wordshoal.list$ideal.points.order], pch = 19, xlim = c(-2, 2), 
           main = i, xlab = "")
  segments(Wordshoal.list$ideal.points.CI[Wordshoal.list$ideal.points.order, 1], 1:6, 
           Wordshoal.list$ideal.points.CI[Wordshoal.list$ideal.points.order, 2], 1:6)
}
dev.off()